Transcriptome Data Analysis - hands-on session - IN A GLANCE
@FedeBioinfoThis tutorial is intended to be used “at a glance”:
transcriptomics_practical documentYou can consider this as a very much squeezed (yet well squeezed) and complete (yup!) version of the whole (the real, whole) analysis workflow when dealing with RNA-seq data.
Ready? Let’s go!
macrophagelibrary("macrophage")
dir <- system.file("extdata", package = "macrophage")
coldata <- read.csv(file.path(dir, "coldata.csv"))[, c(1, 2, 3, 5)]
## how many samples do we have?
## what are the important experimental factors?
coldata
# names sample_id line_id condition_name
# 1 SAMEA103885102 diku_A diku_1 naive
# 2 SAMEA103885347 diku_B diku_1 IFNg
# 3 SAMEA103885043 diku_C diku_1 SL1344
# 4 SAMEA103885392 diku_D diku_1 IFNg_SL1344
# 5 SAMEA103885182 eiwy_A eiwy_1 naive
# 6 SAMEA103885136 eiwy_B eiwy_1 IFNg
# 7 SAMEA103885413 eiwy_C eiwy_1 SL1344
# 8 SAMEA103884967 eiwy_D eiwy_1 IFNg_SL1344
# 9 SAMEA103885368 fikt_A fikt_3 naive
# 10 SAMEA103885218 fikt_B fikt_3 IFNg
# 11 SAMEA103885319 fikt_C fikt_3 SL1344
# 12 SAMEA103885004 fikt_D fikt_3 IFNg_SL1344
# 13 SAMEA103885284 ieki_A ieki_2 naive
# 14 SAMEA103885059 ieki_B ieki_2 IFNg
# 15 SAMEA103884898 ieki_C ieki_2 SL1344
# 16 SAMEA103885157 ieki_D ieki_2 IFNg_SL1344
# 17 SAMEA103885111 podx_A podx_1 naive
# 18 SAMEA103884919 podx_B podx_1 IFNg
# 19 SAMEA103885276 podx_C podx_1 SL1344
# 20 SAMEA103885021 podx_D podx_1 IFNg_SL1344
# 21 SAMEA103885262 qaqx_A qaqx_1 naive
# 22 SAMEA103885228 qaqx_B qaqx_1 IFNg
# 23 SAMEA103885308 qaqx_C qaqx_1 SL1344
# 24 SAMEA103884949 qaqx_D qaqx_1 IFNg_SL1344
coldata$files <- file.path(dir, "quants", coldata$names, "quant.sf.gz")
## these are the files output from salmon, containing all info on the quantifications
coldata$files
# [1] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885102/quant.sf.gz"
# [2] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885347/quant.sf.gz"
# [3] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885043/quant.sf.gz"
# [4] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885392/quant.sf.gz"
# [5] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885182/quant.sf.gz"
# [6] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885136/quant.sf.gz"
# [7] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885413/quant.sf.gz"
# [8] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103884967/quant.sf.gz"
# [9] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885368/quant.sf.gz"
# [10] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885218/quant.sf.gz"
# [11] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885319/quant.sf.gz"
# [12] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885004/quant.sf.gz"
# [13] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885284/quant.sf.gz"
# [14] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885059/quant.sf.gz"
# [15] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103884898/quant.sf.gz"
# [16] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885157/quant.sf.gz"
# [17] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885111/quant.sf.gz"
# [18] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103884919/quant.sf.gz"
# [19] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885276/quant.sf.gz"
# [20] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885021/quant.sf.gz"
# [21] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885262/quant.sf.gz"
# [22] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885228/quant.sf.gz"
# [23] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103885308/quant.sf.gz"
# [24] "/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/macrophage/extdata/quants/SAMEA103884949/quant.sf.gz"
suppressPackageStartupMessages({
library("tximeta")
library("DESeq2")
library("org.Hs.eg.db")
library("SummarizedExperiment")
})
## at the transcript level...
st <- tximeta(coldata = coldata, type = "salmon", dropInfReps = TRUE)
head(assay(st, "counts"), 3)
# SAMEA103885102 SAMEA103885347 SAMEA103885043 SAMEA103885392
# ENST00000456328.2 22.058 12.404 5.44 0.000
# ENST00000450305.2 0.000 0.000 0.00 0.000
# ENST00000488147.1 119.092 180.069 161.55 93.747
# SAMEA103885182 SAMEA103885136 SAMEA103885413 SAMEA103884967
# ENST00000456328.2 0.0 10.833 5.119 6.708
# ENST00000450305.2 0.0 0.000 0.000 0.000
# ENST00000488147.1 145.5 141.607 189.152 98.019
# SAMEA103885368 SAMEA103885218 SAMEA103885319 SAMEA103885004
# ENST00000456328.2 0.000 4.484 0.000 0.000
# ENST00000450305.2 0.000 0.000 0.000 0.000
# ENST00000488147.1 132.243 88.429 96.871 66.813
# SAMEA103885284 SAMEA103885059 SAMEA103884898 SAMEA103885157
# ENST00000456328.2 27.337 12.401 0.000 6.107
# ENST00000450305.2 0.000 0.000 0.000 0.000
# ENST00000488147.1 250.127 211.475 144.212 134.842
# SAMEA103885111 SAMEA103884919 SAMEA103885276 SAMEA103885021
# ENST00000456328.2 23.333 11.794 4.670 0.000
# ENST00000450305.2 0.000 0.000 0.000 0.000
# ENST00000488147.1 205.167 151.599 134.082 69.402
# SAMEA103885262 SAMEA103885228 SAMEA103885308 SAMEA103884949
# ENST00000456328.2 7.629 3.907 9.195 0.000
# ENST00000450305.2 0.000 0.000 0.000 0.000
# ENST00000488147.1 154.885 125.882 183.322 53.233
rowData(st)
# DataFrame with 205870 rows and 3 columns
# tx_id gene_id tx_name
# <integer> <CharacterList> <character>
# ENST00000456328.2 1 ENSG00000223972.5 ENST00000456328.2
# ENST00000450305.2 2 ENSG00000223972.5 ENST00000450305.2
# ENST00000488147.1 9483 ENSG00000227232.5 ENST00000488147.1
# ENST00000619216.1 9484 ENSG00000278267.1 ENST00000619216.1
# ENST00000473358.1 3 ENSG00000243485.5 ENST00000473358.1
# ... ... ... ...
# ENST00000361681.2 206692 ENSG00000198695.2 ENST00000361681.2
# ENST00000387459.1 206693 ENSG00000210194.1 ENST00000387459.1
# ENST00000361789.2 206684 ENSG00000198727.2 ENST00000361789.2
# ENST00000387460.2 206685 ENSG00000210195.2 ENST00000387460.2
# ENST00000387461.2 206694 ENSG00000210196.2 ENST00000387461.2
colData(st)
# DataFrame with 24 rows and 4 columns
# names sample_id line_id condition_name
# <character> <character> <character> <character>
# SAMEA103885102 SAMEA103885102 diku_A diku_1 naive
# SAMEA103885347 SAMEA103885347 diku_B diku_1 IFNg
# SAMEA103885043 SAMEA103885043 diku_C diku_1 SL1344
# SAMEA103885392 SAMEA103885392 diku_D diku_1 IFNg_SL1344
# SAMEA103885182 SAMEA103885182 eiwy_A eiwy_1 naive
# ... ... ... ... ...
# SAMEA103885021 SAMEA103885021 podx_D podx_1 IFNg_SL1344
# SAMEA103885262 SAMEA103885262 qaqx_A qaqx_1 naive
# SAMEA103885228 SAMEA103885228 qaqx_B qaqx_1 IFNg
# SAMEA103885308 SAMEA103885308 qaqx_C qaqx_1 SL1344
# SAMEA103884949 SAMEA103884949 qaqx_D qaqx_1 IFNg_SL1344
## at the gene level...
sg <- tximeta::summarizeToGene(st)
sg
# class: RangedSummarizedExperiment
# dim: 58294 24
# metadata(7): tximetaInfo quantInfo ... txdbInfo assignRanges
# assays(3): counts abundance length
# rownames(58294): ENSG00000000003.14 ENSG00000000005.5 ...
# ENSG00000285993.1 ENSG00000285994.1
# rowData names(2): gene_id tx_ids
# colnames(24): SAMEA103885102 SAMEA103885347 ... SAMEA103885308
# SAMEA103884949
# colData names(4): names sample_id line_id condition_name
## can you guess what happened?
sg <- tximeta::addIds(sg, "SYMBOL", gene = TRUE)
sg
# class: RangedSummarizedExperiment
# dim: 58294 24
# metadata(7): tximetaInfo quantInfo ... txdbInfo assignRanges
# assays(3): counts abundance length
# rownames(58294): ENSG00000000003.14 ENSG00000000005.5 ...
# ENSG00000285993.1 ENSG00000285994.1
# rowData names(3): gene_id tx_ids SYMBOL
# colnames(24): SAMEA103885102 SAMEA103885347 ... SAMEA103885308
# SAMEA103884949
# colData names(4): names sample_id line_id condition_name
head(rowData(sg))
# DataFrame with 6 rows and 3 columns
# gene_id
# <character>
# ENSG00000000003.14 ENSG00000000003.14
# ENSG00000000005.5 ENSG00000000005.5
# ENSG00000000419.12 ENSG00000000419.12
# ENSG00000000457.13 ENSG00000000457.13
# ENSG00000000460.16 ENSG00000000460.16
# ENSG00000000938.12 ENSG00000000938.12
# tx_ids
# <CharacterList>
# ENSG00000000003.14 ENST00000612152.4,ENST00000373020.8,ENST00000614008.4,...
# ENSG00000000005.5 ENST00000373031.4,ENST00000485971.1
# ENSG00000000419.12 ENST00000371588.9,ENST00000466152.5,ENST00000371582.8,...
# ENSG00000000457.13 ENST00000367771.10,ENST00000367770.5,ENST00000367772.8,...
# ENSG00000000460.16 ENST00000498289.5,ENST00000472795.5,ENST00000496973.5,...
# ENSG00000000938.12 ENST00000374005.7,ENST00000399173.5,ENST00000374004.5,...
# SYMBOL
# <character>
# ENSG00000000003.14 TSPAN6
# ENSG00000000005.5 TNMD
# ENSG00000000419.12 DPM1
# ENSG00000000457.13 SCYL3
# ENSG00000000460.16 FIRRM
# ENSG00000000938.12 FGR
## add gene symbols
sg <- tximeta::addIds(sg, "SYMBOL", gene = TRUE)
sg
# class: RangedSummarizedExperiment
# dim: 58294 24
# metadata(7): tximetaInfo quantInfo ... txdbInfo assignRanges
# assays(3): counts abundance length
# rownames(58294): ENSG00000000003.14 ENSG00000000005.5 ...
# ENSG00000285993.1 ENSG00000285994.1
# rowData names(3): gene_id tx_ids SYMBOL
# colnames(24): SAMEA103885102 SAMEA103885347 ... SAMEA103885308
# SAMEA103884949
# colData names(4): names sample_id line_id condition_name
head(rowData(sg))
# DataFrame with 6 rows and 3 columns
# gene_id
# <character>
# ENSG00000000003.14 ENSG00000000003.14
# ENSG00000000005.5 ENSG00000000005.5
# ENSG00000000419.12 ENSG00000000419.12
# ENSG00000000457.13 ENSG00000000457.13
# ENSG00000000460.16 ENSG00000000460.16
# ENSG00000000938.12 ENSG00000000938.12
# tx_ids
# <CharacterList>
# ENSG00000000003.14 ENST00000612152.4,ENST00000373020.8,ENST00000614008.4,...
# ENSG00000000005.5 ENST00000373031.4,ENST00000485971.1
# ENSG00000000419.12 ENST00000371588.9,ENST00000466152.5,ENST00000371582.8,...
# ENSG00000000457.13 ENST00000367771.10,ENST00000367770.5,ENST00000367772.8,...
# ENSG00000000460.16 ENST00000498289.5,ENST00000472795.5,ENST00000496973.5,...
# ENSG00000000938.12 ENST00000374005.7,ENST00000399173.5,ENST00000374004.5,...
# SYMBOL
# <character>
# ENSG00000000003.14 TSPAN6
# ENSG00000000005.5 TNMD
# ENSG00000000419.12 DPM1
# ENSG00000000457.13 SCYL3
# ENSG00000000460.16 FIRRM
# ENSG00000000938.12 FGR
colData(sg)
# DataFrame with 24 rows and 4 columns
# names sample_id line_id condition_name
# <character> <character> <character> <character>
# SAMEA103885102 SAMEA103885102 diku_A diku_1 naive
# SAMEA103885347 SAMEA103885347 diku_B diku_1 IFNg
# SAMEA103885043 SAMEA103885043 diku_C diku_1 SL1344
# SAMEA103885392 SAMEA103885392 diku_D diku_1 IFNg_SL1344
# SAMEA103885182 SAMEA103885182 eiwy_A eiwy_1 naive
# ... ... ... ... ...
# SAMEA103885021 SAMEA103885021 podx_D podx_1 IFNg_SL1344
# SAMEA103885262 SAMEA103885262 qaqx_A qaqx_1 naive
# SAMEA103885228 SAMEA103885228 qaqx_B qaqx_1 IFNg
# SAMEA103885308 SAMEA103885308 qaqx_C qaqx_1 SL1344
# SAMEA103884949 SAMEA103884949 qaqx_D qaqx_1 IFNg_SL1344
table(colData(sg)$condition_name)
#
# IFNg IFNg_SL1344 naive SL1344
# 6 6 6 6
table(colData(sg)$line_id)
#
# diku_1 eiwy_1 fikt_3 ieki_2 podx_1 qaqx_1
# 4 4 4 4 4 4
## what is our aim? Find DE genes by which design specification?
colData(sg)$condition_name <- factor(colData(sg)$condition_name)
colData(sg)$condition_name <- relevel(colData(sg)$condition_name, ref = "naive")
colData(sg)$condition_name
# [1] naive IFNg SL1344 IFNg_SL1344 naive IFNg
# [7] SL1344 IFNg_SL1344 naive IFNg SL1344 IFNg_SL1344
# [13] naive IFNg SL1344 IFNg_SL1344 naive IFNg
# [19] SL1344 IFNg_SL1344 naive IFNg SL1344 IFNg_SL1344
# Levels: naive IFNg IFNg_SL1344 SL1344
## here's the main container for all infos!
dds <- DESeqDataSet(sg, design = ~ line_id + condition_name)
dds
# class: DESeqDataSet
# dim: 58294 24
# metadata(8): tximetaInfo quantInfo ... assignRanges version
# assays(3): counts abundance avgTxLength
# rownames(58294): ENSG00000000003.14 ENSG00000000005.5 ...
# ENSG00000285993.1 ENSG00000285994.1
# rowData names(3): gene_id tx_ids SYMBOL
# colnames(24): SAMEA103885102 SAMEA103885347 ... SAMEA103885308
# SAMEA103884949
# colData names(4): names sample_id line_id condition_name
## all set!
nrow(dds)
# [1] 58294
table(rowSums(assay(dds, "counts")) == 0)
#
# FALSE TRUE
# 38829 19465
keep <- rowSums(counts(dds)) > 1
dds <- dds[keep, ]
dim(dds)
# [1] 35683 24
## exploratory analysis and visualization
vsd <- DESeq2::vst(dds)
DESeq2::plotPCA(vsd, intgroup = "condition_name")
pcaExplorer::pcaplot(vsd, intgroup = "condition_name")
## differential expression analysis boils down to...
dds <- DESeq2::DESeq(dds)
DESeq2::plotDispEsts(dds)
## Default - SL1344 vs naive
res <- DESeq2::results(dds)
head(res)
# log2 fold change (MLE): condition name SL1344 vs naive
# Wald test p-value: condition name SL1344 vs naive
# DataFrame with 6 rows and 6 columns
# baseMean log2FoldChange lfcSE stat pvalue
# <numeric> <numeric> <numeric> <numeric> <numeric>
# ENSG00000000003.14 171.782 0.1171248 0.3008327 0.389335 6.97028e-01
# ENSG00000000419.12 967.527 0.0886824 0.0860008 1.031181 3.02456e-01
# ENSG00000000457.13 681.637 0.7109442 0.1973877 3.601766 3.16062e-04
# ENSG00000000460.16 263.282 -1.0347169 0.2179499 -4.747499 2.05947e-06
# ENSG00000000938.12 2646.887 1.6453083 0.2348000 7.007275 2.43005e-12
# ENSG00000000971.15 3045.742 0.7794411 0.4980265 1.565059 1.17569e-01
# padj
# <numeric>
# ENSG00000000003.14 8.11132e-01
# ENSG00000000419.12 4.55113e-01
# ENSG00000000457.13 1.22290e-03
# ENSG00000000460.16 1.18399e-05
# ENSG00000000938.12 3.14455e-11
# ENSG00000000971.15 2.20491e-01
## We'll instead focus on IFNgamma vs naive
res <- DESeq2::results(dds, contrast = c("condition_name", "IFNg", "naive"))
head(res)
# log2 fold change (MLE): condition_name IFNg vs naive
# Wald test p-value: condition name IFNg vs naive
# DataFrame with 6 rows and 6 columns
# baseMean log2FoldChange lfcSE stat pvalue
# <numeric> <numeric> <numeric> <numeric> <numeric>
# ENSG00000000003.14 171.782 -0.2829860 0.3010930 -0.939862 3.47288e-01
# ENSG00000000419.12 967.527 0.0383933 0.0856623 0.448194 6.54013e-01
# ENSG00000000457.13 681.637 1.2838945 0.1966270 6.529593 6.59486e-11
# ENSG00000000460.16 263.282 -1.4725128 0.2183088 -6.745092 1.52930e-11
# ENSG00000000938.12 2646.887 0.6747921 0.2351631 2.869464 4.11168e-03
# ENSG00000000971.15 3045.742 4.9869519 0.4966828 10.040518 1.01142e-23
# padj
# <numeric>
# ENSG00000000003.14 5.77116e-01
# ENSG00000000419.12 8.25573e-01
# ENSG00000000457.13 1.62287e-09
# ENSG00000000460.16 4.10702e-10
# ENSG00000000938.12 1.89738e-02
# ENSG00000000971.15 1.13504e-21
## what do these columns mean?
summary(res)
#
# out of 35683 with nonzero total read count
# adjusted p-value < 0.1
# LFC > 0 (up) : 3718, 10%
# LFC < 0 (down) : 3437, 9.6%
# outliers [1] : 0, 0%
# low counts [2] : 12453, 35%
# (mean count < 2)
# [1] see 'cooksCutoff' argument of ?results
# [2] see 'independentFiltering' argument of ?results
hist(res$pvalue)
## Remove the genes that were filtered out in the independent filtering
hist(res$pvalue[!is.na(res$padj)])
## can we be more strict?
res.05 <- results(dds, alpha = 0.05,
contrast = c("condition_name", "IFNg", "naive"))
table(res.05$padj < 0.05)
#
# FALSE TRUE
# 16423 6115
## lower the false discovery rate threshold
resLFC1 <- results(dds, lfcThreshold = 1,
contrast = c("condition_name", "IFNg", "naive"))
summary(resLFC1)
#
# out of 35683 with nonzero total read count
# adjusted p-value < 0.1
# LFC > 1.00 (up) : 748, 2.1%
# LFC < -1.00 (down) : 469, 1.3%
# outliers [1] : 0, 0%
# low counts [2] : 0, 0%
# (mean count < 0)
# [1] see 'cooksCutoff' argument of ?results
# [2] see 'independentFiltering' argument of ?results
table(resLFC1$padj < 0.1)
#
# FALSE TRUE
# 34466 1217
## raise the log2 fold change threshold from 0 using the `lfcThreshold` argument of *results*
## checking individual genes
plotCounts(dds, gene = "ENSG00000126561.16", intgroup = "condition_name",
normalized = TRUE, transform = FALSE)
GeneTonic::gene_plot(dds, gene = "ENSG00000126561.16", intgroup = "condition_name",
normalized = TRUE, transform = FALSE)
## checking an overview on all genes
DESeq2::plotMA(res, ylim = c(-5, 5))
library("apeglm")
DESeq2::resultsNames(dds)
# [1] "Intercept" "line_id_eiwy_1_vs_diku_1"
# [3] "line_id_fikt_3_vs_diku_1" "line_id_ieki_2_vs_diku_1"
# [5] "line_id_podx_1_vs_diku_1" "line_id_qaqx_1_vs_diku_1"
# [7] "condition_name_IFNg_vs_naive" "condition_name_IFNg_SL1344_vs_naive"
# [9] "condition_name_SL1344_vs_naive"
res_ape <- DESeq2::lfcShrink(dds, coef = "condition_name_IFNg_vs_naive", type = "apeglm")
DESeq2::plotMA(res_ape, ylim = c(-5, 5))
## what happened here?
library("pheatmap")
stopifnot(rownames(vsd) == rownames(res))
## looking at the top 30 DE genes
mat <- assay(vsd)
rownames(mat) <- rowData(vsd)$SYMBOL
mat <- mat[head(order(res$padj), 30), ]
mat <- mat - rowMeans(mat)
df <- as.data.frame(colData(vsd)[, c("condition_name"), drop = FALSE])
pheatmap(mat, annotation_col = df)
## looking at the most variable ones
mat <- assay(vsd)
rownames(mat) <- rowData(vsd)$SYMBOL
topVarGenes <- head(order(rowVars(mat), decreasing = TRUE), 30)
mat <- mat[topVarGenes, ]
mat <- mat - rowMeans(mat)
df <- as.data.frame(colData(vsd)[, c("condition_name"), drop = FALSE])
pheatmap(mat, annotation_col = df)
## is there something in common here?
library("GeneTonic")
res$symbol <- rowData(dds)$SYMBOL
de_symbols_IFNg_vs_naive <- deseqresult2df(res, FDR = 0.05)$symbol
bg_ids <- rowData(dds)$SYMBOL[rowSums(counts(dds)) > 0]
library("topGO")
topgo_DE_macrophage_IFNg_vs_naive <- pcaExplorer::topGOtable(
DEgenes = de_symbols_IFNg_vs_naive,
BGgenes = bg_ids,
ontology = "BP",
mapping = "org.Hs.eg.db",
geneID = "symbol",
topTablerows = 500
)
library("clusterProfiler")
clupro_DE_macrophage_IFNg_vs_naive <- clusterProfiler::enrichGO(
gene = de_symbols_IFNg_vs_naive,
universe = bg_ids,
keyType = "SYMBOL",
OrgDb = org.Hs.eg.db,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = FALSE
)
DT::datatable(topgo_DE_macrophage_IFNg_vs_naive[1:10,])
DT::datatable(as.data.frame(clupro_DE_macrophage_IFNg_vs_naive)[1:10,])
## can I export these tables to text/excel?
stopifnot(all(rownames(res) == rownames(dds)))
res$symbol <- rowData(dds)$SYMBOL
resOrdered <- res[order(res$padj), ]
head(resOrdered)
# log2 fold change (MLE): condition_name IFNg vs naive
# Wald test p-value: condition name IFNg vs naive
# DataFrame with 6 rows and 7 columns
# baseMean log2FoldChange lfcSE stat pvalue
# <numeric> <numeric> <numeric> <numeric> <numeric>
# ENSG00000125347.13 30487.254 5.55915 0.218390 25.4551 6.19761e-143
# ENSG00000111181.12 687.519 4.70999 0.195911 24.0415 1.02514e-127
# ENSG00000162645.12 36639.987 6.66498 0.286603 23.2551 1.26442e-119
# ENSG00000137496.17 7118.885 4.05787 0.177049 22.9195 2.97302e-116
# ENSG00000145365.10 3642.657 5.19246 0.237141 21.8961 2.82829e-106
# ENSG00000204257.14 1906.261 4.07091 0.190575 21.3612 3.06785e-101
# padj symbol
# <numeric> <character>
# ENSG00000125347.13 1.43971e-138 IRF1
# ENSG00000111181.12 1.19070e-123 SLC6A12
# ENSG00000162645.12 9.79081e-116 GBP2
# ENSG00000137496.17 1.72658e-112 IL18BP
# ENSG00000145365.10 1.31402e-102 TIFA
# ENSG00000204257.14 1.18777e-97 HLA-DMA
resOrderedDF <- as.data.frame(resOrdered)[seq_len(100), ]
write.table(cbind(id = rownames(resOrderedDF), resOrderedDF),
file = "myderesults.txt", quote = FALSE, sep = "\t",
row.names = FALSE)
## can we streamline things here?
res_enrich_topGO <- shake_topGOtableResult(topgo_DE_macrophage_IFNg_vs_naive)
res_enrich_clupro <- shake_enrichResult(clupro_DE_macrophage_IFNg_vs_naive)
gtl_macrophage <- GeneTonicList(
dds = dds,
res_de = res,
res_enrich = res_enrich_clupro,
annotation_obj = data.frame(
gene_id = rowData(dds)$gene_id,
gene_name = rowData(dds)$SYMBOL
)
)
## saving this
# saveRDS(gtl_macrophage, "gtl_macrophage.RDS")
## reloading this
# gtl_macrophage <- readRDS("gtl_macrophage.RDS")
## and that is it!
# GeneTonic(gtl = gtl_macrophage)
## or if expecting to upload at runtime... (e.g. used as a server-like app)
# GeneTonic()
sessionInfo()
# R version 4.4.0 (2024-04-24)
# Platform: x86_64-apple-darwin20
# Running under: macOS Monterey 12.7.1
#
# Matrix products: default
# BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
# LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
#
# locale:
# [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#
# time zone: Europe/Berlin
# tzcode source: internal
#
# attached base packages:
# [1] stats4 stats graphics grDevices utils datasets methods
# [8] base
#
# other attached packages:
# [1] SingleR_2.6.0 celldex_1.14.0
# [3] scran_1.32.0 scater_1.32.0
# [5] scuttle_1.14.0 TENxPBMCData_1.22.0
# [7] HDF5Array_1.32.0 rhdf5_2.48.0
# [9] DelayedArray_0.30.1 SparseArray_1.4.8
# [11] S4Arrays_1.4.1 abind_1.4-5
# [13] Matrix_1.7-0 lubridate_1.9.3
# [15] forcats_1.0.0 stringr_1.5.1
# [17] dplyr_1.1.4 purrr_1.0.2
# [19] readr_2.1.5 tidyr_1.3.1
# [21] tibble_3.2.1 tidyverse_2.0.0
# [23] ggthemes_5.1.0 gapminder_1.0.0
# [25] ggplot2_3.5.1 MASS_7.3-61
# [27] clusterProfiler_4.12.0 topGO_2.56.0
# [29] SparseM_1.83 GO.db_3.19.1
# [31] graph_1.82.0 GeneTonic_2.8.0
# [33] iSEEu_1.16.0 iSEEhex_1.6.0
# [35] iSEE_2.16.0 SingleCellExperiment_1.26.0
# [37] pheatmap_1.0.12 apeglm_1.26.1
# [39] pcaExplorer_2.30.0 bigmemory_4.6.4
# [41] edgeR_4.2.0 limma_3.60.3
# [43] ExploreModelMatrix_1.16.0 GenomicFeatures_1.57.0
# [45] org.Hs.eg.db_3.19.1 AnnotationDbi_1.66.0
# [47] DESeq2_1.44.0 SummarizedExperiment_1.34.0
# [49] Biobase_2.64.0 MatrixGenerics_1.16.0
# [51] matrixStats_1.3.0 GenomicRanges_1.56.1
# [53] GenomeInfoDb_1.40.1 IRanges_2.38.0
# [55] S4Vectors_0.42.0 BiocGenerics_0.50.0
# [57] tximeta_1.22.1 macrophage_1.20.0
# [59] rmarkdown_2.27 knitr_1.47
# [61] BiocStyle_2.32.1
#
# loaded via a namespace (and not attached):
# [1] igraph_2.0.3 ica_1.0-3
# [3] plotly_4.10.4 ca_0.71.1
# [5] listviewer_4.0.0 zlibbioc_1.50.0
# [7] tidyselect_1.2.1 bit_4.0.5
# [9] doParallel_1.0.17 clue_0.3-65
# [11] lattice_0.22-6 rjson_0.2.21
# [13] blob_1.2.4 rngtools_1.5.2
# [15] parallel_4.4.0 png_0.1-8
# [17] cli_3.6.2 ggplotify_0.1.2
# [19] registry_0.5-1 GOstats_2.70.0
# [21] ProtGenerics_1.36.0 rintrojs_0.3.4
# [23] goftest_1.2-3 BiocIO_1.14.0
# [25] bs4Dash_2.3.3 bluster_1.14.0
# [27] BiocNeighbors_1.22.0 uwot_0.2.2
# [29] tximport_1.32.0 dendextend_1.17.1
# [31] shadowtext_0.1.3 curl_5.2.1
# [33] mime_0.12 evaluate_0.24.0
# [35] tidytree_0.4.6 leiden_0.4.3.1
# [37] ComplexHeatmap_2.20.0 stringi_1.8.4
# [39] XML_3.99-0.16.1 httpuv_1.6.15
# [41] magrittr_2.0.3 rappdirs_0.3.3
# [43] splines_4.4.0 mclust_6.1.1
# [45] ggraph_2.2.1 webshot_0.5.5
# [47] DT_0.33 sctransform_0.4.1
# [49] ggbeeswarm_0.7.2 DBI_1.2.3
# [51] jquerylib_0.1.4 genefilter_1.86.0
# [53] withr_3.0.0 emdbook_1.3.13
# [55] lmtest_0.9-40 enrichplot_1.24.0
# [57] RBGL_1.80.0 GSEABase_1.66.0
# [59] bdsmatrix_1.3-7 tidygraph_1.3.1
# [61] formatR_1.14 colourpicker_1.3.0
# [63] rtracklayer_1.64.0 BiocManager_1.30.23
# [65] htmlwidgets_1.6.4 fs_1.6.4
# [67] biomaRt_2.60.0 ggrepel_0.9.5
# [69] labeling_0.4.3 shinycssloaders_1.0.0
# [71] reticulate_1.38.0 annotate_1.82.0
# [73] hexbin_1.28.3 zoo_1.8-12
# [75] XVector_0.44.0 UCSC.utils_1.0.0
# [77] timechange_0.3.0 foreach_1.5.2
# [79] fansi_1.0.6 patchwork_1.2.0
# [81] visNetwork_2.1.2 grid_4.4.0
# [83] data.table_1.15.4 ggtree_3.12.0
# [85] seriation_1.5.5 RSpectra_0.16-1
# [87] irlba_2.3.5.1 alabaster.schemas_1.4.0
# [89] fastDummies_1.7.3 gridGraphics_0.5-1
# [91] lazyeval_0.2.2 yaml_2.3.8
# [93] survival_3.7-0 scattermore_1.2
# [95] BiocVersion_3.19.1 crayon_1.5.3
# [97] RcppAnnoy_0.0.22 progressr_0.14.0
# [99] RColorBrewer_1.1-3 tweenr_2.0.3
# [101] later_1.3.2 Rgraphviz_2.48.0
# [103] ggridges_0.5.6 codetools_0.2-20
# [105] base64enc_0.1-3 GlobalOptions_0.1.2
# [107] Seurat_5.1.0 KEGGREST_1.44.1
# [109] bbmle_1.0.25.1 Rtsne_0.17
# [111] shape_1.4.6.1 icons_0.2.0
# [113] Rsamtools_2.20.0 filelock_1.0.3
# [115] shinyBS_0.61.1 pkgconfig_2.0.3
# [117] xml2_1.3.6 GenomicAlignments_1.40.0
# [119] aplot_0.2.3 alabaster.base_1.4.1
# [121] spatstat.sparse_3.1-0 ape_5.8
# [123] viridisLite_0.4.2 gridBase_0.4-7
# [125] xtable_1.8-4 Category_2.70.0
# [127] highr_0.11 plyr_1.8.9
# [129] httr_1.4.7 globals_0.16.3
# [131] tools_4.4.0 SeuratObject_5.0.2
# [133] beeswarm_0.4.0 AnnotationForge_1.46.0
# [135] nlme_3.1-165 xaringan_0.30
# [137] HDO.db_0.99.1 dbplyr_2.5.0
# [139] ExperimentHub_2.12.0 shinyjs_2.1.0
# [141] crosstalk_1.2.1 ComplexUpset_1.3.3
# [143] assertthat_0.2.1 digest_0.6.35
# [145] numDeriv_2016.8-1.1 bookdown_0.39
# [147] farver_2.1.2 tzdb_0.4.0
# [149] AnnotationFilter_1.28.0 reshape2_1.4.4
# [151] yulab.utils_0.1.4 viridis_0.6.5
# [153] glue_1.7.0 cachem_1.1.0
# [155] BiocFileCache_2.12.0 polyclip_1.10-6
# [157] generics_0.1.3 Biostrings_2.72.1
# [159] dynamicTreeCut_1.63-1 mvtnorm_1.2-5
# [161] parallelly_1.37.1 txdbmaker_1.0.0
# [163] statmod_1.5.0 RcppHNSW_0.6.0
# [165] ScaledMatrix_1.12.0 pbapply_1.7-2
# [167] httr2_1.0.1 spam_2.10-0
# [169] backbone_2.1.4 vroom_1.6.5
# [171] gson_0.1.0 dqrng_0.4.1
# [173] utf8_1.2.4 graphlayouts_1.1.1
# [175] alabaster.se_1.4.1 fontawesome_0.5.2
# [177] gridExtra_2.3 shiny_1.8.1.1
# [179] GenomeInfoDbData_1.2.12 rhdf5filters_1.16.0
# [181] RCurl_1.98-1.14 memoise_2.0.1
# [183] scales_1.3.0 threejs_0.3.3
# [185] gypsum_1.0.1 future_1.33.2
# [187] RANN_2.6.1 spatstat.data_3.1-2
# [189] bigmemory.sri_0.1.8 rstudioapi_0.16.0
# [191] cluster_2.1.6 spatstat.utils_3.0-5
# [193] fitdistrplus_1.1-11 hms_1.1.3
# [195] munsell_0.5.1 cowplot_1.1.3
# [197] colorspace_2.1-0 rlang_1.1.4
# [199] DelayedMatrixStats_1.26.0 sparseMatrixStats_1.16.0
# [201] shinyWidgets_0.8.6 dotCall64_1.1-1
# [203] shinydashboard_0.7.2 ggforce_0.4.2
# [205] circlize_0.4.16 mgcv_1.9-1
# [207] xfun_0.45 alabaster.matrix_1.4.1
# [209] heatmaply_1.5.0 coda_0.19-4.1
# [211] iterators_1.0.14 GOSemSim_2.30.0
# [213] treeio_1.28.0 Rhdf5lib_1.26.0
# [215] bitops_1.0-7 promises_1.3.0
# [217] scatterpie_0.2.3 RSQLite_2.3.7
# [219] qvalue_2.36.0 TSP_1.2-4
# [221] fgsea_1.30.0 compiler_4.4.0
# [223] alabaster.ranges_1.4.1 prettyunits_1.2.0
# [225] beachmat_2.20.0 listenv_0.9.1
# [227] Rcpp_1.0.12 tippy_0.1.0
# [229] tensor_1.5 AnnotationHub_3.12.0
# [231] BiocSingular_1.20.0 progress_1.2.3
# [233] uuid_1.2-0 BiocParallel_1.38.0
# [235] spatstat.random_3.2-3 archive_1.1.8
# [237] R6_2.5.1 fastmap_1.2.0
# [239] fastmatch_1.1-4 ROCR_1.0-11
# [241] vipor_0.4.7 ensembldb_2.28.0
# [243] rsvd_1.0.5 gtable_0.3.5
# [245] shinyAce_0.4.2 KernSmooth_2.23-24
# [247] miniUI_0.1.1.1 deldir_2.0-4
# [249] htmltools_0.5.8.1 bit64_4.0.5
# [251] spatstat.explore_3.2-7 lifecycle_1.0.4
# [253] restfulr_0.0.15 sass_0.4.9
# [255] vctrs_0.6.5 rsconnect_1.3.1
# [257] spatstat.geom_3.2-9 DOSE_3.30.1
# [259] NMF_0.27 ggfun_0.1.5
# [261] emo_0.0.0.9000 sp_2.1-4
# [263] future.apply_1.11.2 bslib_0.7.0
# [265] pillar_1.9.0 metapod_1.12.0
# [267] locfit_1.5-9.9 jsonlite_1.8.8
# [269] expm_0.999-9 GetoptLong_1.0.5